package com.meta.common.utils;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @Author: xieZW
 * @Date: 2021/12/23 16:40
 * 用来解析 维保那边的数据 ，方便对接
 * <p>
 * 需要用到的依赖
 * <dependency>
 * <groupId>org.jsoup</groupId>
 * <artifactId>jsoup</artifactId>
 * <version>1.12.1</version>
 * </dependency>
 */
public class ParseHtmlUtils {


    /**
     * 这三个需要根据实际情况改一下，其他内容不需要更改
     */
    static String fromFile = "E:\\ws66ds\\workspace\\file-demo\\src\\main\\resources\\config\\yellow.html";

    public static void main(String[] args) {

        try {
            List<String> voLIst = getVoLIst(true, null);
            System.out.println("voLIst = " + voLIst);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static List<String> getVoLIst(boolean file, File stream) throws IOException {

        Document document = null;
        if (file) {
            document = Jsoup.parse(new File(fromFile), "utf-8");
        } else {
            document = Jsoup.parse(stream, "utf-8");
        }

        List<String> list = new ArrayList<>();
        Elements imgTags = document.select("img[src]");
        Elements imgTags2 = document.select("img[data-original]");
        for (Element element : imgTags) {
            Attributes node = element.attributes();
            Iterator<Attribute> iterator = node.iterator();
            while (iterator.hasNext()) {
                Attribute attribute = iterator.next();
                String key = attribute.getKey();
                //属性中包含“src”字符串，但不是src的属性
//                if (!key.equals("src") && key.indexOf("src") != -1) {
                if (key.equals("src")) {
                    String otherSrc = attribute.getValue();
                    if (otherSrc.contains("pic.b")) {
                        list.add(otherSrc);
                        System.out.println("====otherSrc====" + otherSrc);
                        break;
                    }
                }
            }
        }

        for (Element element : imgTags2) {
            Attributes node = element.attributes();
            Iterator<Attribute> iterator = node.iterator();
            while (iterator.hasNext()) {
                Attribute attribute = iterator.next();
                String key = attribute.getKey();
                if (key.equals("data-original")) {
                    String otherSrc = attribute.getValue();
                    if (otherSrc.contains("pic.b")) {
                        list.add(otherSrc);
                        System.out.println("====otherSrc====" + otherSrc);
                        break;
                    }
                }
            }
        }
        return list;
    }
}
